from get_chrome_jigou import get_chrombrowser
from parsel import Selector
import time
import csv
import pandas as pd
import random


def parse_yiyuan(yiyuan, chrome):
    print(f'开始查询：{yiyuan}_工商信息')
    url_str = r'https://aiqicha.baidu.com/s?q=%s&t=0'
    url = url_str % yiyuan
    chrome.get(url)
    time.sleep(3)
    # # 选取企业名
    # chrome.execute_script('document.querySelector("body > div.base.page-search.has-search-tab > div.aqc-content-wrapper.has-footer > div > div.main > div.filter-panel > div.fix-border > div > div.tabs-content > div:nth-child(1) > div > div > div.advanced-filter.basic-filter > div:nth-child(1) > div > div.content > div:nth-child(1)").click()')
    # time.sleep(1)
    # # 选取省份
    # chrome.execute_script('document.querySelector("body > div.base.page-search.has-search-tab > div.aqc-content-wrapper.has-footer > div > div.main > div.filter-panel > div.fix-border > div > div.tabs-content > div:nth-child(1) > div > div > div.advanced-filter.basic-filter > div:nth-child(2) > div > div.content.show-more-content > div:nth-child(17)").click()')
    # time.sleep(1)
    # # 选取开业状态
    # chrome.execute_script(
    #     'document.querySelector("body > div:nth-child(13) > ul > li:nth-child(1)").click()')
    # time.sleep(2)
    soup = Selector(chrome.page_source)
    href = soup.xpath('//div[@class="card"][1]//a/@href').extract_first()
    if href:
        yiyuan_url = 'https://aiqicha.baidu.com' + href
        js_open = f'''window.open('{yiyuan_url}')'''
        chrome.execute_script(js_open)
        handles = chrome.window_handles
        main_handle = handles[0]
        chrome.switch_to.window(handles[-1])
        time.sleep(2)
        yiyuan_soup = Selector(chrome.page_source)
        name = yiyuan_soup.xpath(
            'normalize-space(//td[text()="企业名称"]/following-sibling::td[1]/span/text())').extract_first()
        xinyon_code = yiyuan_soup.xpath(
            'normalize-space(//div[contains(text(),"统一社会信用代码") and @class="ivu-poptip-rel"]/../../following-sibling::td[1]/span/text())').extract_first()
        faren = yiyuan_soup.xpath(
            'normalize-space(//div[contains(text(),"法定代表人：")]/a/text())').extract_first()
        zhuantai = yiyuan_soup.xpath(
            '//td[text()="经营状态"]/following-sibling::td[1]/text()').extract_first()
        area = yiyuan_soup.xpath(
            '//td[text()="行政区划"]/following-sibling::td[1]/text()').extract_first()
        zhuce_ziben = yiyuan_soup.xpath(
            'normalize-space(//td[text()="注册资本"]/following-sibling::td[1]/text())').extract_first()
        leixin = yiyuan_soup.xpath(
            'normalize-space(//td[text()="企业类型"]/following-sibling::td[1]/text())').extract_first()
        jigou_code = yiyuan_soup.xpath(
            '//div[contains(text(),"组织机构代码") and @class="ivu-poptip-rel"]/../../following-sibling::td[1]/span/text()').extract_first()
        nashui_hao = yiyuan_soup.xpath(
            'normalize-space(//div[contains(text(),"纳税人识别号") and @class="ivu-poptip-rel"]/../../following-sibling::td[1]/span/text())').extract_first()
        hanye = yiyuan_soup.xpath(
            'normalize-space(//td[text()="所属行业"]/following-sibling::td[1]/text())').extract_first()
        jinyin = yiyuan_soup.xpath(
            'normalize-space(//td[text()="经营范围"]/following-sibling::td[1]/div/text())').extract_first()
        dizhi = yiyuan_soup.xpath(
            '//td[text()="注册地址"]/following-sibling::td[1]/span/text()').extract_first()
        gonshanhao = yiyuan_soup.xpath(
            'normalize-space(//td[text()="工商注册号"]/following-sibling::td[1]/span/text())').extract_first()
        phone = yiyuan_soup.xpath(
            'normalize-space(//span[@data-log-title="detail-head-phone"]/span/text())').extract_first()
        email = yiyuan_soup.xpath(
            'normalize-space(//a[@data-log-an="detail-head-email"]/text())').extract_first()
        chenli_riqi = yiyuan_soup.xpath(
            'normalize-space(//td[text()="成立日期"]/following-sibling::td[1]/text())').extract_first()
        data = [yiyuan, name, xinyon_code, faren, zhuantai, area, zhuce_ziben,
                leixin, jigou_code, nashui_hao, gonshanhao, hanye, jinyin, dizhi, phone, email, chenli_riqi]
        print(data)
        print(f'{yiyuan}_工商信息查完')
        chrome.close()
        chrome.switch_to.window(main_handle)

    else:
        print(f'{yiyuan}_未查询到工商信息')
        data = [yiyuan, '', '', '', '', '', '', '', '', '', '', '', '', '']
    with open('爱企查_武汉高新企业工商信息.csv', 'a+', encoding='utf-8', newline='') as file:
        csv_writer = csv.writer(file)
        csv_writer.writerow(data)

    time.sleep(random.randint(2, 4))


if __name__ == '__main__':
    df = pd.read_excel(
        r'C:\Users\Administrator\Desktop\2022湖北省代理机构-120家.xls', sheet_name='2022湖北省代理机构')
    yiyuan_list = list(df['机构名称'])
    chrome = get_chrombrowser()
    login_url = r'https://aiqicha.baidu.com/'
    chrome.get(login_url)
    input('@@请扫码登陆:')
    for yiyuan in yiyuan_list:
        parse_yiyuan(yiyuan, chrome)
    chrome.quit()
